library(tidyverse)
library(gapminder)

Fundamentals


Purpose

Importance

Why mathematical modeling is not enough

Advanced Visuals

Aesthetic Description Values
x x-axis data
y y-axis data
color color of dots, outline of other shapes
fill fill color Same as color
alpha transparency
size diameter of points, thickness of lines
linetype line dash pattern
labels text on a plot or axes see below
shape shape of dot

- Scaling an axis

gapminder_2007 <- filter(gapminder, year == 2007)

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() + 
#######################################################
  scale_x_log10() + 
  scale_y_continuous(limits=c(0,100))

- Setting axis limit

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() + 
#######################################################
  expand_limits(y=30) 

- Faceting

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() + 
#######################################################
  facet_wrap(~ continent) 

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() + 
#######################################################
  facet_grid(~ continent) 

- Labeling

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() +
#######################################################
  labs(
    x = "GDP Per Capita",
    y = "Life Expectancy",
    title = "Impact of GDP on Life Expectancy",
    subtitle = "Economic Development as Human Development",
    caption = "Gapminder, 2017"
  )

- Themes

ggplot(gapminder, aes(x=gdpPercap, y=lifeExp, color=continent, size=pop)) + 
  geom_point() +
  scale_x_log10() +
  facet_wrap(~ year) +
  labs(
    x = "GDP Per Capita",
    y = "Life Expectancy",
    title = "Growing GDP and Life Expectancy",
    subtitle = "Converging Global Trends",
    caption = "Gapminder, 2017") +
#######################################################
  theme_bw()

# Piecemeal
theme(text = element_text(family = "Bookman"),
    title = element_text(color = "gray25"),
    plot.subtitle = element_text(size = 12))

# Setting a function to call for all graphs
theme_myproject <- function() {
  theme_bw() +
  theme(text = element_text(family = "Bookman", color = "gray25"))}

ggplot(gapminder, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() +
  theme_myproject()

Scatter Plots


Basic Plot

Essential: x, y

gapminder_2007 <- gapminder %>%
  filter(year == 2007)

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point()


Let’s Get Fancy

Optional: Aplpha, Color, Fill, Shape, Size

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp, fill=continent, size=pop)) + 
  geom_point(color="black", alpha=0.7, shape=25) 


Jittering

ggplot(gapminder, aes(x=continent, y=lifeExp)) + 
  geom_point()

ggplot(gapminder, aes(x=continent, y=lifeExp)) + 
  geom_point(position="jitter")

ggplot(gapminder, aes(x=continent, y=lifeExp)) + 
  geom_point(position=position_jitter(width=0.1))


Trend Lines

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp)) + 
  geom_point() + 
  scale_x_log10() +
  geom_smooth(method="lm")

ggplot(gapminder_2007, aes(x=gdpPercap, y=lifeExp, color=continent)) + 
  geom_point() + 
  scale_x_log10() +
  geom_smooth(method="lm", se=FALSE) +
  geom_smooth(method="lm", se=FALSE, aes(group=1, color="Overall Global"), linetype=2) +
  scale_color_manual("Region", values= c("#D95F02","orange","#1B9E77","blue","purple","black"))

Bar Plots


Histograms

Shows the binned distribution of a continuous variable

Essential: x

ggplot(gapminder_2007, aes(x = lifeExp)) +
  geom_histogram()

Optional: bin size

ggplot(gapminder_2007, aes(x = lifeExp)) +
  geom_histogram(binwidth = 10)

Optional: “y” (which makes it frequency instead of count)

ggplot(gapminder_2007, aes(x = lifeExp, y = ..density..)) +
  geom_histogram(binwidth = 10)

ggplot(gapminder_2007, aes(x = lifeExp)) +
  geom_freqpoly(binwidth = 10)

ggplot(gapminder_2007, aes(x = lifeExp, y = ..density..)) +
  geom_histogram(binwidth = 5, fill="grey50") + 
  geom_freqpoly(binwidth = 5)


Absolute Count Bar

Shows a count of each category of the variable selected

ggplot(gapminder_2007, aes(x=continent)) + 
  geom_bar()


Distribution Bar

Shows a selected statistic for each category of the variable selected

life_by_cont <- gapminder_2007 %>%
  group_by(continent) %>%
  summarize(mean_life = mean(lifeExp), sd_life = sd(lifeExp))

ggplot(life_by_cont, aes(x=continent, y=mean_life)) + 
  geom_bar(stat="identity")

ggplot(life_by_cont, aes(x=continent, y=mean_life)) + 
  geom_bar(stat="identity", fill ="grey50") + 
  geom_errorbar(aes(ymin=(mean_life - sd_life), ymax=(mean_life + sd_life), width=0.2))


Multi-Group Bar

Shows multiple bar graphs on one plot

-Stacked

yearly_pop <- gapminder %>%
  group_by(year, continent) %>%
  summarize(tot_pop = sum(as.numeric(pop))) %>% 
  ungroup()
  
ggplot(yearly_pop, aes(x=year, y=tot_pop, fill=continent)) +
  geom_bar(stat = "identity")

-Proportional Fill

yearly_pop <- gapminder %>%
  group_by(year, continent) %>%
  summarize(tot_pop = sum(as.numeric(pop))) %>% 
  ungroup()
  
ggplot(yearly_pop, aes(x=year, y=tot_pop, fill=continent)) +
  geom_bar(stat = "identity", position="fill")

-Separate Groups

yearly_pop <- gapminder %>%
  group_by(year, continent) %>%
  summarize(tot_pop = sum(as.numeric(pop))) %>% 
  ungroup()
  
ggplot(yearly_pop, aes(x=year, y=tot_pop, fill=continent)) +
  geom_bar(stat = "identity", position = "dodge")

Line Plots


Basic

Required: x, y

total_pop <- gapminder %>%
  group_by(year) %>%
  summarize(pop = sum(as.numeric(pop)))

ggplot(total_pop, aes(x=year, y=pop)) + 
  geom_line()


Advanced

Optional: Color, Size, Alpha, Line Type

all_pop <- gapminder %>%
  group_by(year, continent) %>%
  summarize(pop = sum(as.numeric(pop))) %>%
  left_join(total_pop, by="year")

ggplot(all_pop) + 
  geom_line(aes(x=year, y=pop.y), size=1) +
  geom_line(aes(x=year, y=pop.x, color=continent), linetype=2)

country_totals <- gapminder %>%
  group_by(year, continent) %>%
  summarize(pop = sum(as.numeric(pop)))

ggplot(country_totals, aes(x=year, y=pop, fill=continent)) +
  geom_area()

country_totals <- gapminder %>%
  group_by(year, continent) %>%
  summarize(pop = sum(as.numeric(pop)))

ggplot(country_totals, aes(x=year, y=pop, fill=continent)) +
  geom_area(position="fill")

recessions <- tibble(
  begin = c("1969-12-01", "1973-11-01", "1980-01-01", "1981-07-01", "1990-07-01", "2001-03-01"),
  end = c("1970-11-01", "1975-03-01", "1980-07-01", "1982-11-01", "1991-03-01", "2001-11-01")
) %>%
  mutate(
    begin = as.Date(begin),
    end = as.Date(end)
  )

ggplot(economics, aes(x = date, y = unemploy/pop)) +
  geom_rect(data = recessions, inherit.aes = FALSE,
            aes(xmin = begin, xmax = end, ymin = -Inf, ymax = Inf),
            fill = "red", alpha = 0.2) +
  geom_line() +
  labs(
    x = "",
    y = "Unemployment Rate",
    title = "Impact of Recessions on Unemployment"
  )

Box Plots

gapminder_2007 <- gapminder %>%
  filter(year==2007)

ggplot(gapminder_2007, aes(x = continent, y = lifeExp)) +
  geom_boxplot()